

# Setup ------------------------------------------------------------------------
  options(stringsAsFactors = F)
  library(pacman)
  p_load(magrittr, data.table, rgeos, rgdal)
  # Directories
  dir_research <- "/Users/edwardarubin/Dropbox/Research/MyProjects/"
  dir_gas      <- dir_research %>% paste0("NaturalGas/")
  dir_rds      <- dir_gas %>% paste0("DataR/")
  dir_results  <- dir_gas %>% paste0("DataR/Results/Results20171024/")
  dir_spatial  <- dir_gas %>% paste0("DataSpatial/")

# Load zip code shapefile ------------------------------------------------------
  # Load it
  zip_shp <- readOGR(
    dsn = paste0(dir_spatial, "CaliforniaZipCodes/"),
    layer = "CaliforniaZipCodes")

# Zip code counts --------------------------------------------------------------
  # Load monthly zip code summaries
  zip_pge <- paste0(dir_rds, "Summaries/zip5PGE.rds") %>% readRDS()
  zip_socal <- paste0(dir_rds, "Summaries/zip5SoCal.rds") %>% readRDS()
  # Aggregate to median of HH count (by zip)
  zip_pge <- zip_pge[,
    .(hh_count = median(hh_count) %>% round(0)),
    by = zip5]
  zip_socal <- zip_socal[,
    .(hh_count = median(hh_count) %>% round(0)),
    by = zip5]
  # Change names
  setnames(zip_pge, c("zip", "count_pge"))
  setnames(zip_socal, c("zip", "count_socal"))
  # Merge by zip
  zip_dt <- merge(x = zip_pge, y = zip_socal, by = "zip", all = T)
  # Change NAs to zeros
  zip_dt[is.na(count_pge), count_pge := 0]
  zip_dt[is.na(count_socal), count_socal := 0]
  # Add indicators for utility presence
  zip_dt[, `:=`(pge = F, socal = F)]
  zip_dt[count_pge > 0, pge := T]
  zip_dt[count_socal > 0, socal := T]

# Find the zip codes that touch our central zip codes --------------------------
  # The zip codes served by both utitlities
  zips_common <- zip_dt[pge == T & socal == T, zip]
  # Add row number (starting at 0) to the shapefile
  zip_shp@data$ROW <- 0:(nrow(zip_shp) - 1)
  # Find the zip codes that border the zip codes served by both utilities
  zips_touch1 <- gTouches(
    spgeom1 = subset(zip_shp, ZIP_CODE %in% zips_common),
    spgeom2 = zip_shp,
    byid = T)
  # Find maximum of rows in zips_touch; convert to data table
  zips_touch1 <- data.table(
    zip = zip_shp@data$ZIP_CODE,
    border1 = apply(X = zips_touch1, FUN = max, MARGIN = 1))
  # Repeat for neighbors of neighbors
  zips_touch2 <- gTouches(
    spgeom1 = subset(zip_shp, ZIP_CODE %in% zips_touch1[border1 == 1, zip]),
    spgeom2 = zip_shp,
    byid = T)
  zips_touch2 <- data.table(
    zip = zip_shp@data$ZIP_CODE,
    border2 = apply(X = zips_touch2, FUN = max, MARGIN = 1))
  # Repeat for neighbors of neighbors of neighbors
  zips_touch3 <- gTouches(
    spgeom1 = subset(zip_shp, ZIP_CODE %in% zips_touch2[border2 == 1, zip]),
    spgeom2 = zip_shp,
    byid = T)
  zips_touch3 <- data.table(
    zip = zip_shp@data$ZIP_CODE,
    border3 = apply(X = zips_touch3, FUN = max, MARGIN = 1))
  # Repeat again
  zips_touch4 <- gTouches(
    spgeom1 = subset(zip_shp, ZIP_CODE %in% zips_touch3[border3 == 1, zip]),
    spgeom2 = zip_shp,
    byid = T)
  zips_touch4 <- data.table(
    zip = zip_shp@data$ZIP_CODE,
    border4 = apply(X = zips_touch4, FUN = max, MARGIN = 1))
  # Repeat one last time
  zips_touch5 <- gTouches(
    spgeom1 = subset(zip_shp, ZIP_CODE %in% zips_touch4[border4 == 1, zip]),
    spgeom2 = zip_shp,
    byid = T)
  zips_touch5 <- data.table(
    zip = zip_shp@data$ZIP_CODE,
    border5 = apply(X = zips_touch5, FUN = max, MARGIN = 1))
  # Create a data table for the study area (common zips)
  zips_touch0 <- data.table(zip = zip_shp@data$ZIP_CODE)
  zips_touch0[, border0 := 1 * (zip %in% zips_common)]
  # Combine neighbor datasets
  border_dt <- merge(zips_touch0, zips_touch1, by = "zip", all = T) %>%
    merge(zips_touch2, by = "zip", all = T) %>%
    merge(zips_touch3, by = "zip", all = T) %>%
    merge(zips_touch4, by = "zip", all = T) %>%
    merge(zips_touch5, by = "zip", all = T)
  # Add variable for neighbor group (group 0 is the common-zip-code group)
  border_dt[, group :=
    6 - (border0 + border1 + border2 + border3 + border4 + border5)]
  # Join the pge and socal variables
  border_dt %<>% merge(zip_dt, by = "zip", all = T)
  # Limit to zip codes in groups 0-5
  border_dt <- border_dt[group <= 5 & !is.na(pge)]
  # Save
  saveRDS(
    object = border_dt,
    file = paste0(dir_rds, "zipCodeGroups.rds"))
